
# k <- readRDS("temp/vf.rds") |> 
#   filter(treated > 0)
# 
# lat <- readRDS("temp/vf_later.rds") |> 
#   mutate(treated = 0)
# 
# saveRDS(select(k, -LALVOTERID, -surname,
#                -street, -num, -rest, -Residence_Addresses_City,
#                -cong, -Voters_BirthDate), "temp/vf_anon.rds")
# 
# saveRDS(select(lat, -LALVOTERID, -surname,
#                -street, -Residence_Addresses_City,
#                -cong, -Voters_BirthDate), "temp/vf_later_anon.rds")

##########################################
##########################################
##########################################
##########################################

k <- bind_rows(readRDS("temp/vf_anon.rds"),
               readRDS("temp/vf_later_anon.rds"))

k <- k |> 
  ungroup() |> 
  mutate(voter_id = sample(1:n(), n()))

k <- k |> 
  filter(n_dead < 3 | is.na(n_dead),
         death_date <= "2020-12-31" | treated == 0) |> 
  rename(deced_age = age) |> 
  mutate(reg_date = as.numeric(as.Date(reg_date, "%m/%d/%Y") - as.Date("2010-01-01")),
         death_date = as.numeric(death_date - as.Date("2010-01-01")),
         dem = as.numeric(party == "Democratic"),
         rep = as.numeric(party == "Republican"))



first_set <- filter(k,
                    treated > 0) |> 
  mutate(t = treated == 2)

first_set <- first_set[complete.cases(select(first_set,
                                             lat, lon, voter_age,
                                             reg_date,
                                             dem, rep,
                                             deced_age,
                                             death_date,
                                             n_dead, 
                                             General_2012_11_06,
                                             General_2014_11_04,
                                             General_2016_11_08,
                                             General_2018_11_06,
                                             starts_with("pred."),
                                             male, 
                                             median_income, some_college, pop_dens)), ]

X <- select(first_set,
            lat, lon, voter_age,
            reg_date,
            dem, rep,
            deced_age,
            General_2012_11_06,
            General_2014_11_04,
            General_2016_11_08,
            General_2018_11_06,
            death_date,
            n_dead,
            starts_with("pred."),
            -pred.other, -pred.aian,
            male, 
            median_income, some_college, pop_dens)

mb <- ebalance(first_set$t, X)

first_set <- bind_rows(
  first_set |> 
    filter(t) |> 
    mutate(weight = 1),
  first_set |> 
    filter(!t) |> 
    mutate(weight = mb$w)
)

ll <- first_set |> 
  group_by(t) |> 
  summarize(across(starts_with("Gener"), ~ weighted.mean(., weight))) |> 
  pivot_longer(starts_with("Gen")) |> 
  mutate(name = as.integer(substring(name, 9, 12)))

ll2 <- first_set |> 
  group_by(t) |> 
  summarize(across(starts_with("Gener"), mean)) |> 
  pivot_longer(starts_with("Gen")) |> 
  mutate(name = as.integer(substring(name, 9, 12)))

ggplot(ll, aes(x = name, y = value, color = t)) + geom_line()

check <- first_set |> 
  group_by(t) |> 
  summarize(across(c(lat, lon, voter_age,
                     reg_date,
                     deced_age,
                     dem, rep,
                     death_date,
                     n_dead,
                     starts_with("pred."),
                     male, 
                     median_income, some_college, pop_dens), ~ weighted.mean(., weight)))
###################################################

second_set <- filter(k,
                     treated != 1) |> 
  mutate(t = treated == 2)

second_set <- second_set[complete.cases(select(second_set,
                                               lat, lon, voter_age,
                                               reg_date,
                                               dem, rep,
                                               deced_age,
                                               n_dead,
                                               General_2012_11_06,
                                               General_2014_11_04,
                                               General_2016_11_08,
                                               General_2018_11_06,
                                               starts_with("pred."),
                                               male, 
                                               median_income, some_college, pop_dens)), ]

X <- select(second_set,
            lat, lon, voter_age,
            reg_date,
            dem, rep,
            deced_age,
            General_2012_11_06,
            General_2014_11_04,
            General_2016_11_08,
            General_2018_11_06,
            n_dead,
            starts_with("pred."),
            male, 
            median_income, some_college, pop_dens,
            -pred.other, -pred.aian)

mb <- ebalance(second_set$t, X)

second_set <- bind_rows(
  second_set |> 
    filter(t) |> 
    mutate(weight = 1),
  second_set |> 
    filter(!t) |> 
    mutate(weight = mb$w)
)

ll <- second_set |> 
  group_by(t) |> 
  summarize(across(starts_with("Gener"), ~ weighted.mean(., weight))) |> 
  pivot_longer(starts_with("Gen")) |> 
  mutate(name = as.integer(substring(name, 9, 12)))

ggplot(ll, aes(x = name, y = value, color = t)) + geom_line()

##########################

full <- bind_rows(
  first_set,
  filter(second_set, !t)
) |> 
  pivot_longer(starts_with("Gener")) |> 
  mutate(name = as.integer(substring(name, 9, 12)),
         t1 = treated > 0,
         t2 = treated > 1) |> 
  rename(year = name,
         turnout = value)

ll <- full |> 
  group_by(treated, year) |> 
  summarize(`Weighted` = weighted.mean(turnout, weight),
            `Unweighted` = mean(turnout)) |> 
  pivot_longer(cols = c(Weighted, Unweighted), names_to = "pan", values_to = "turnout")


ll <- ll |> 
  mutate(group = case_when(treated == 0 ~ "No Household Deaths",
                           treated == 1 ~ "Household Non-Covid Death",
                           treated == 2 ~ "Household Covid Death"))

saveRDS(ll, "temp/weighted_full_ll.rds")

#############################

m1 <- fixest::feols(turnout ~ t1 * t2 * I(year == 2020) | year + voter_id, 
                    full, cluster = c("voter_id", "year"),
                    weights = full$weight)

m2 <- fixest::feols(turnout ~ t1 * t2 * I(year == 2020) * rep | year + voter_id, 
                    full, cluster = c("voter_id", "year"),
                    weights = full$weight)

saveRDS(m1, "temp/wa_model_1.rds")
saveRDS(m2, "temp/wa_model_2.rds")

ests <- rbindlist(lapply(seq(2014, 2020, 2), function(y){
  j <- confint(fixest::feols(turnout ~ t1 * t2 * I(year == y) | year + voter_id, 
                             filter(full, year <= y), cluster = c("voter_id", "year"),
                             weights = filter(full, year <= y)$weight))
  j$year <- y
  j$est <- c("any death", "covid death")
  
  return(j)
}))

saveRDS(ests, "temp/es_wa.rds")

######################################
######################################
######################################
######################################

full <- as.data.table(filter(full, year == 2012))

unw <- full[, by = list(treated),
            c(lapply(.SD, mean)),
            .SDcols = c("reg_date",
                        "deced_age",
                        "death_date",
                        "n_dead",
                        "pred.white", "pred.black", "pred.hisp", "pred.asian",
                        "male", "voter_age", "dem", "rep",
                        "median_income", "some_college", "pop_dens")] |> 
  mutate(across(c(reg_date, death_date), ~ format((as.Date("2010-01-01") + .), "%B %d, %Y")),
         across(c(deced_age, n_dead, pop_dens, voter_age), ~ comma(., .1)),
         across(c(starts_with("pred"), male, some_college, dem, rep), ~ percent(., .1)),
         across(c(median_income), ~ dollar(., 1)))

unw <- pivot_longer(unw, cols = colnames(select(unw, -treated))) |> 
  mutate(treated = case_when(treated == 0 ~ "No Household Death",
                             treated == 1 ~ "Household Non-Covid Death",
                             T ~ "Household Covid Death")) |> 
  pivot_wider(names_from = treated, values_from = value)

w <- full[, by = list(treated),
          c(lapply(.SD, weighted.mean, weight)),
          .SDcols = c("reg_date",
                      "deced_age",
                      "death_date",
                      "n_dead",
                      "pred.white", "pred.black", "pred.hisp", "pred.asian",
                      "male", "voter_age", "dem", "rep",
                      "median_income", "some_college", "pop_dens")] |> 
  mutate(across(c(reg_date, death_date), ~ format((as.Date("2010-01-01") + .), "%B %d, %Y")),
         across(c(deced_age, n_dead, pop_dens, voter_age), ~ comma(., .1)),
         across(c(starts_with("pred"), male, some_college, dem, rep), ~ percent(., .1)),
         across(c(median_income), ~ dollar(., 1)))

w <- pivot_longer(w, cols = colnames(select(w, -treated))) |> 
  mutate(treated = case_when(treated == 0 ~ "No Household Death",
                             treated == 1 ~ "Household Non-Covid Death",
                             T ~ "Household Covid Death")) |> 
  pivot_wider(names_from = treated, values_from = value)

tab <- left_join(unw, w, by = "name") |> 
  mutate(across(c(starts_with("No H")), ~ ifelse(name == "death_date", "", .)))

vo <- fread("raw_data/var_orders.csv")

tab <- left_join(tab, vo) |> 
  arrange(order) |> 
  select(clean, everything(), -name, -order)

tab <- mutate(tab, across(everything(), ~ gsub("%", "\\\\%", .)))

colnames(tab) <- c(" ", rep(c("Household Covid Death", "Household Non-Covid Death", "No Household Death"), 2))

options(knitr.kable.NA = '')

for(i in c(2:nrow(tab))){
  if(i %% 2 == 0){
    tab$` `[i] <- paste0("\\rowcolor{Gray}", tab$` `[i])
  }
}

kable(tab, "latex", caption = "\\label{tab:full-bal} Balance Table for Entropy Balancing",
      linesep = "", align = c("l", rep("c", 6)),
      booktabs = T, escape = F) |> 
  add_header_above(c(" " = 1, "Means: Unweighted Data" = 3, "Means: Weighted Data" = 3), align = "c") |> 
  column_spec(c(1), width = "4cm") |>
  column_spec(c(2:7), width = "3cm") |>
  kable_styling(latex_options = c("scale_down")) |> 
  save_kable("temp/balance.tex")

